package com.guiji.quartz.domain;

import cn.hutool.http.HttpRequest;
import cn.hutool.http.HttpResponse;
import cn.hutool.json.JSONArray;
import cn.hutool.json.JSONObject;
import cn.hutool.json.JSONUtil;
import com.guiji.quartz.task.DataSaveUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.Date;
import java.util.List;

/**
 * @program: cms-vue-plus
 * @description:
 * @author: gaoX
 * @date 2021/11/26 18:24
 */
public class Techinasia {
	private static final Logger log = LoggerFactory.getLogger(Techinasia.class);

	public void mainMethod() throws Exception {
		List<String> list = new ArrayList<>();//https://www.techinasia.com/wp-json/techinasia/2.0/categories/news/posts?page=1&per_page=9
		list.add("https://www.techinasia.com/wp-json/techinasia/2.0/posts?page=1&per_page=10");
		list.add("https://www.techinasia.com/wp-json/techinasia/2.0/categories/startups/posts?page=1&per_page=9");
		list.add("https://www.techinasia.com/wp-json/techinasia/2.0/categories/news/posts?page=1&per_page=9");
		list.forEach(url ->{
			HttpResponse response = null;
			try {
				response = HttpRequest.get(url)
					.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9")
					.header("Accept-Encoding", "gzip, deflate")
					.setConnectionTimeout(15000)
					.execute();
			} catch (Exception e) {
				log.error("代理失败1" + e);
				return;
			}
			String resJson = response.body();
			String category = "Techinasia-StartUps";
			if(url.contains("news")){
				category = "Techinasia-AsianTech";
			}
			getContent(resJson,category);
		});

	}

	private static void getContent(String resJson,String category) {
		JSONArray jsonArray = null;
		try {
			JSONObject jsonObject = JSONUtil.parseObj(resJson);
			jsonArray = (JSONArray) jsonObject.get("posts");
		} catch (Exception e) {
			log.error("获取post出错");
			return;
		}
		for(Object co:jsonArray){
			String title = null;
			String content = null;
			String url= null;
			try {
				JSONObject jo = JSONUtil.parseObj(co);
				title = (String) jo.get("title");
				content = (String) jo.get("content");
				url = (String) jo.get("link");
			} catch (Exception e) {
				continue;
			}
			content = content.replaceAll("</p>", "\\$\\$\\$").replaceAll("\n|\\s", "")
				.replaceAll("<p style=.*?</p>", "").replaceAll("<p id=.*?</p>", "")
				.replaceAll("<div class=.*?</div>", "").replaceAll("<.*?>", "");
			List<Object> newImgs = new ArrayList<>();
			Date date = new Date();
			if (content.length() > 50000 || content.replaceAll("\\$|[\\s\\p{Zs}]", "").length() < 20 || newImgs.size() > 20) {
				continue;
			}
			try {
				DataSaveUtil.saveData(title, content, newImgs, url, date, "https://www.techinasia.com/category/startups||news", category);
				log.info(Thread.currentThread().getName() + "-----------------Techinasia----------------");
			} catch (Exception e) {
				log.error("Techinasia入库出错");
			}


		}


	}

}
